In [1]:
# Transportation data from 2014 survey is for year 2013, etc
cex = {
    2012: {
        "single_adults": 37770.0,
        "transport": {
            "used_car": 209764.0,
            "gasoline": 328170.0,
            "other_vehicle": 324668.0,
            "public": 67486.0,
            "used_car_percent": 0.152,
            "gasoline_percent": 0.158,
            "other_vehicle_percent": 0.191,
            "public_percent": 0.174,
            "regional": {
                REGION_EAST:   16.4 / 17.6,  
                REGION_MIDWEST: 18.0 / 17.6,
                REGION_SOUTH: 18.9 / 17.6,
                REGION_WEST: 16.5 / 17.6,
            }
        }

    },
    2013: {
        "single_adults": 37884.0,
        "transport": {
            "used_car": 214524.0,
            "gasoline": 313481.0,
            "other_vehicle": 345454.0,
            "public": 73842.0,
            "used_car_percent": 0.146,
            "gasoline_percent": 0.157,
            "other_vehicle_percent": 0.163,
            "public_percent": 0.172,
            "regional": {
                REGION_EAST: 15.7 / 17.0,     # 0.923
                REGION_MIDWEST: 16.9 / 17.0,  # 0.994
                REGION_SOUTH: 18.3 / 17.0,    # 1.076
                REGION_WEST: 16.1 / 17.0,     # 0.947
            }
        }
    },
}

# Ideal numbers from model
ideal_transport_2013 = (3764, 4569, 4697, 4054)

# Base price for transport
transportation_costs = defaultdict(dict)

for year in cex:
    transportation_costs[year]["base"] = \
        (1000000 * ((cex[year]["transport"]["used_car"] * cex[year]["transport"]["used_car_percent"]) + \
                    (cex[year]["transport"]["gasoline"] * cex[year]["transport"]["gasoline_percent"]) + \
                    (cex[year]["transport"]["other_vehicle"] * cex[year]["transport"]["other_vehicle_percent"] ) + \
                    (cex[year]["transport"]["public"] * cex[year]["transport"]["public_percent"] )) /  float(cex[year]["single_adults"] * 1000) ) * inflation_multipliers[year]

    # Account for regional drift
    for region in cex[year]["transport"]["regional"]:
        transportation_costs[year][region] = transportation_costs[year]["base"] * cex[year]["transport"]["regional"][region]

transportation_costs["2014_ideal"]["base"] = 0.0
transportation_costs["2014_ideal"][REGION_EAST] = ideal_transport_2013[0]
transportation_costs["2014_ideal"][REGION_MIDWEST] = ideal_transport_2013[1]
transportation_costs["2014_ideal"][REGION_SOUTH] = ideal_transport_2013[2]
transportation_costs["2014_ideal"][REGION_WEST] = ideal_transport_2013[3]

# Print it nicely
errors = []
pt = PrettyTable()
pt.add_column("Year", transportation_costs.keys())
for region in sorted(transportation_costs[2013].keys()):
    data = [ transportation_costs[year][region] for year in transportation_costs  ]
    pt.add_column("Trans Cost (%s)" % region, data)
    errors.append(transportation_costs["2014_ideal"][region] - data[-2])

print(sum([np.abs(error) for error in errors]))

# Print as HTML
HTML(pt.get_html_string())


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-c58e6d2f54f7> in <module>()
     13             "public_percent": 0.174,
     14             "regional": {
---> 15                 REGION_EAST:   16.4 / 17.6,
     16                 REGION_MIDWEST: 18.0 / 17.6,
     17                 REGION_SOUTH: 18.9 / 17.6,

NameError: name 'REGION_EAST' is not defined

Testing theory about regional difference


In [ ]:
# Order: NE, MW, S, W
used_car_rations = (2.5 / 3.2, 3.5 / 3.2, 3.5 / 3.2, 2.9 / 3.2)
gas_rations = (3.8 / 4.6, 4.7 / 4.6, 5.2 / 4.6, 4.5 / 4.6)
other_rations = (5.2 / 5.1, 5.0  / 5.1, 5.1 / 5.1,  5.1 / 5.1)
public_rations = (1.6/1.1,  0.9/1.1,  0.8/1.1, 1.2/1.1)

error = []
for region in range(4):
    val = (1000000 * 
         ( (
            (cex[2013]["transport"]["used_car"] * cex[2013]["transport"]["used_car_percent"] * used_car_rations[region]) + \
            (cex[2013]["transport"]["gasoline"] * cex[2013]["transport"]["gasoline_percent"] * gas_rations[region]) + \
            (cex[2013]["transport"]["other_vehicle"] * cex[2013]["transport"]["other_vehicle_percent"] * other_rations[region]) + \
            (cex[2013]["transport"]["public"] * cex[2013]["transport"]["public_percent"] * public_rations[region])
        ) /  (float(cex[2013]["single_adults"] * 1000)) ) * inflation_multipliers[2013])
    errors.append( val - ideal_transport_2013[region] )

print(sum([np.abs(error) for error in errors]))

In [ ]:
# Calculate regional diff values from aggregated data (since 'combined' only goes back to 2012)
print 1/ (6790803*1000000*20.1 / (1152035*1000000*18.6))
print 1/ (6790803*1000000*21.7 / (1152035*1000000*21.7))
print 1/ (6790803*1000000*34.3 / (1152035*1000000*37.1))
print 1/ (6790803*1000000*23.9 / (1152035*1000000*22.6))


print 1152035/6790803.0